# -*- coding: utf-8 -*-
import re

folder = '../output/'
file_name = '6'

# 页面
html = ''

with open(folder + file_name, 'rt', encoding='utf-8') as f:
    html = f.read()
    
# 去除js
regex = re.compile(r'(?is)<script[^>]*?>[\s\S]*?<\/script>')
html = regex.sub('',html)
# 去除html标签
regex = re.compile(r'(?is)<.*?>')
text = regex.sub('',html)

print(text)